Metabolomics Data Analysis for Porites Bleaching 2019 Experiment

Metabolite Polarity Selection

With untargeted LC-MS, metabolites were detected under both negative and positive polarities. To prevent double counting metabolites in my analysis, I took the mean of each polarities across all samples and selected the metabolite that had the strongest ion intensity under that polarity.

#Read in required libraries
library("reshape")
#library(plyr)
library("dplyr")
library("tidyverse")
library("ggplot2")
library("arsenal")
library("Rmisc")
library(gridExtra)
library(ggpubr)
library(factoextra)
library(ropls)
library(mixOmics)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(lme4)
library(lmerTest)
library(car)
library(effects)
library(ggfortify)
library(cowplot)
library(vegan)
library(corrr)
library(ggcorrplot)
library(GGally)
library(broom)
library(cowplot)
library(RVAideMemoire)
library(arsenal)
library(patchwork)
library(tidyr)
library(ggrepel)
library(MetaboAnalystR)

#Import data

sample.info <- read.csv("../data/Metabolomics/Porites-Kevin_SampleInfo.csv")
peak.pos <- read.csv("../data/Metabolomics/Peaks_Pos.csv")
peak.neg <- read.csv("../data/Metabolomics/Peaks_Neg.csv")

#Removing unncessesary columns

peak.pos2 <- peak.pos[ -c(1:9)] 
peak.pos3 <- peak.pos2[ -c(2:9)] # if we need the blanks, change the 9 to 6

peak.neg2 <- peak.neg[ -c(1:9)] 
peak.neg3 <- peak.neg2[ -c(2:9)] # if we need the blanks, change the 9 to 6

#### Selecting polarity based on higher signal intensity ###

#obtain row means 
pos.mean <- data.frame(ID=peak.pos3[,1], Means=rowMeans(peak.pos3[,-1]))
neg.mean <- data.frame(ID=peak.neg3[,1], Means=rowMeans(peak.neg3[,-1]))

# Comparing polarity means of each metabolite, x = pos, y = neg 
cmp <- comparedf(pos.mean, neg.mean, by = "ID",
                 tol.factor = "labels")        # match only factor labels

n.diffs(cmp) #58 shared metabolites
## [1] 58
list.diffs <- as.data.frame(do.call(cbind, diffs(cmp))) #creating a dataframe with shared metabolites as a list

df.diffs <- data.frame(matrix(unlist(list.diffs), nrow=n.diffs(cmp), byrow=F),stringsAsFactors=FALSE) #converting list to dataframe

names(df.diffs)[1:7] <- c("var.x", "var.y", "ID", "values.x", "values.y", "row.x", "row.y") #changing column names

i <- c(4, 5) #specifying values column
df.diffs[ , i] <- apply(df.diffs[ , i], 2,            # changing values column to numeric
                        function(x) as.numeric(as.character(x)))

df.diffs$selected.pol <-  ifelse(df.diffs$values.x > df.diffs$values.y, 'x', 
                                 ifelse(df.diffs$values.x < df.diffs$values.y, 'y', 'tie')) #selecting polarity with highest mean

x.diff.keep <- df.diffs %>% 
  filter(selected.pol == "x") #extracting x selection

y.diff.keep <- df.diffs %>%
  filter(selected.pol == "y") #extracting y selection

x.all.keep <- peak.pos3[!(peak.pos3$compound %in% y.diff.keep$ID),] #removing rows where the metabolite is higher in neg df
y.all.keep <- peak.neg3[!(peak.neg3$compound %in% x.diff.keep$ID),] #removing rows where the metabolite is higher in pos df


# Re-shaping dataset 
peak.pos4<- melt(x.all.keep, id= "compound") #melting dataset 
peak.neg4<- melt(y.all.keep, id= "compound") #melting dataset 

# adding polarity 
peak.pos4$polarity <- "positive"
peak.neg4$polarity <- "negative"

# Binding positive and negative datasets together
peak.all <- rbind(peak.pos4, peak.neg4)

# Renaming column 
names(peak.all)[2] <- "Sample.ID"
names(peak.all)[3] <- "Raw.IonCount"

# Merging weight sample info
peak.all2 <- merge(peak.all, sample.info, by = "Sample.ID")

Normalization

#Normalization by weight

peak.all2$Raw.IonCount <- as.numeric(as.character(peak.all2$Raw.IonCount))
peak.all2$Norm.IonCount <- peak.all2$Raw.IonCount / peak.all2$Weight.mg

#Selecting columns of interest
peak.all3 <- peak.all2 %>% dplyr::select(Sample.ID, compound, Fragment.ID, Time, Treatment, Norm.IonCount)

#Reformatting dataframe so compounds are listed as column headers
peak.all4 <- peak.all3 %>% spread(compound, Norm.IonCount)

#adding 1000 to all variable to account for 0 values and log normalization 
#Log normalization (https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r)

peak.all5 <- log((peak.all4[5:184] + 1000), 2)

norm.data <- cbind(peak.all4[1:4], peak.all5)

write.csv(norm.data, "../output/Metabolomics/Norm_Data_All.csv")

Grouped PCA

Code inspired by: - http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/118-principal-component-analysis-in-r-prcomp-vs-princomp/ - https://github.com/urol-e5/timeseries/blob/master/time_series_analysis/integration_biological.Rmd line 2465

scaled_pca <-prcomp(norm.data[c(5:184)], scale=TRUE, center=TRUE)
fviz_eig(scaled_pca)

coral_info<-norm.data[c(3,4)]

pca_data<- scaled_pca%>%
  augment(coral_info)%>%
  group_by(Time, Treatment)%>%
  mutate(PC1.mean = mean(.fittedPC1),
         PC2.mean = mean(.fittedPC2))

pca.centroids<- pca_data%>% 
  dplyr::select(Time, Treatment, PC1.mean, PC2.mean)%>%
  dplyr::group_by(Time, Treatment)%>%
  dplyr::summarise(PC1.mean = mean(PC1.mean),
            PC2.mean = mean(PC2.mean))


#Examine PERMANOVA results.  

# scale data
vegan <- scale(norm.data[c(5:184)])

# PerMANOVA 
permanova<-adonis(vegan ~ Time*Treatment, data = norm.data, method='eu')
z_pca<-permanova$aov.tab
z_pca
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Time            2     875.0  437.52  2.8045 0.11048  0.001 ***
## Treatment       2     696.0  348.02  2.2308 0.08788  0.001 ***
## Time:Treatment  4     732.7  183.17  1.1741 0.09251  0.152    
## Residuals      36    5616.3  156.01         0.70912           
## Total          44    7920.0                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assemble plot with background points

#1. make plot with dots

#adding percentages on axis

names(pca_data)[4] <- "PCA1"
names(pca_data)[5] <- "PCA2"
percentage <- round((scaled_pca$sdev^2) / sum((scaled_pca$sdev^2)) * 100, 2)
percentage <- paste( colnames(pca_data[4:50]), "(", paste(as.character(percentage), "%", ")", sep="") )

#setting up data to add polygons
pca_data$Time.Treatment <- paste(pca_data$Time, pca_data$Treatment)
find_hull <- function(pca_data) pca_data[chull(pca_data$PCA1, pca_data$PCA2), ]
hulls <- ddply(pca_data, "Time.Treatment", find_hull)


PCA<-ggplot(pca_data, aes(PCA1, PCA2, color=Treatment)) + 
  geom_point(size = 4, alpha=0.2, aes(shape = Time))+
  scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00")) + 
  scale_shape_manual(values=c(15, 17, 19)) +
  theme_classic()+
  ylim(-10,10)+
  xlim(-15,15)+
  ylab(percentage[2])+
  xlab(percentage[1])+
  geom_text(x=9, y=-8.25, label=paste("p(Day)=", z_pca$`Pr(>F)`[1]), size=7, color=ifelse(z_pca$`Pr(>F)`[1] < 0.05, "black", "darkgray")) + 
  geom_text(x=9, y=-9, label=paste("p(Group)=", z_pca$`Pr(>F)`[2]), size=7, color=ifelse(z_pca$`Pr(>F)`[2] < 0.05, "black", "darkgray")) + 
  geom_text(x=9, y=-9.75, label=paste("p(Day x Group)=", z_pca$`Pr(>F)`[3]), size=7, color=ifelse(z_pca$`Pr(>F)`[3] < 0.05, "black", "darkgray")) + 
  theme(legend.text = element_text(size=18), 
        legend.position=c(0.90,0.85),
        plot.background = element_blank(),
        legend.title = element_text(size=20), 
        plot.margin = margin(1, 1, 1, 1, "cm"),
        axis.text = element_text(size=18), 
        title = element_text(size=25, face="bold"), 
        axis.title = element_text(size=18))

#Add centroids  

#2. add centroids 
PCAcen<-PCA +  geom_polygon(data = hulls, alpha = 0.2, aes(color = Treatment, fill = Treatment, lty = Time)) +
  geom_point(aes(x=PC1.mean, y=PC2.mean,color=Treatment, shape = Time), data=pca.centroids, size=4, show.legend=FALSE) + 
  scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) +
  scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) + 
  scale_shape_manual(values=c(15, 17, 19)) +
  labs(shape="Day", col="Group")+
  guides(fill = FALSE) +
  theme(legend.text = element_text(size=18), 
        legend.position=c(0.90,0.85),
        plot.background = element_blank(),
        legend.title = element_text(size=20), 
        plot.margin = margin(1, 1, 1, 1, "cm"),
        axis.text = element_text(size=18), 
        title = element_text(size=25, face="bold"), 
        axis.title = element_text(size=18))



#Add segments

#3. add segments
segpoints<-pca.centroids%>%
  gather(variable, value, -(Time:Treatment)) %>%
  unite(temp, Time, variable) %>%
  spread(temp, value)

PCAfull<-PCAcen + 
  geom_segment(aes(x = Day0_PC1.mean, y = Day0_PC2.mean, xend = Day37_PC1.mean, yend = Day37_PC2.mean, colour = Treatment), data = segpoints, size=2, show.legend=FALSE) +
  geom_segment(aes(x = Day37_PC1.mean, y = Day37_PC2.mean, xend = Day52_PC1.mean, yend = Day52_PC2.mean, colour = Treatment), data = segpoints, size=2, arrow = arrow(length=unit(0.5,"cm")), show.legend=FALSE); PCAfull

ggsave(filename="../output/Metabolomics/FullPCA_metabolomics.pdf", plot=PCAfull, dpi=300, width=12, height=12, units="in")

D52 ANALYSIS

PLS-DA on Timepoint 3 between each pair

Control vs Bleached

#Control vs Bleached

norm.data_2 <- column_to_rownames(norm.data, 'Sample.ID')

D52_CvsB <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Mortality_Hot")

D52_CvsB_clean <- na.omit(D52_CvsB)

#assigning datasets 
X <- D52_CvsB[4:183]
Y <- as.factor(D52_CvsB$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Control")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp2
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp3
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp4
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp5
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp6
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
#VIP Extraction
D52_CvsB_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsB_VIP_DF <- as.data.frame(D52_CvsB_VIP[["tab"]])

Control vs Partial Mortality

D52_CvsP <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Bleached_Hot")

D52_CvsP_clean <- na.omit(D52_CvsP)

#assigning datasets 
X <- D52_CvsP[4:183]
Y <- as.factor(D52_CvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples

#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Control vs Partial Mortality")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
#VIP Extraction
D52_CvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsP_VIP_DF <- as.data.frame(D52_CvsP_VIP[["tab"]])

Bleached vs Partial Mortality

D52_BvsP <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Control_Ambient")

D52_BvsP_clean <- na.omit(D52_BvsP)

#assigning datasets 
X <- D52_BvsP[4:183]
Y <- as.factor(D52_BvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Partial Mortality")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
#VIP Extraction
D52_BvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_BvsP_VIP_DF <- as.data.frame(D52_BvsP_VIP[["tab"]])

VIP Comparisons for Final Timepoint

####### Overlaps for VIPs >1 #########

# Converting row names to column
D52_CvsB_VIP_table <- rownames_to_column(D52_CvsB_VIP_DF, var = "Metabolite")
D52_CvsP_VIP_table <- rownames_to_column(D52_CvsP_VIP_DF, var = "Metabolite")
D52_BvsP_VIP_table <- rownames_to_column(D52_BvsP_VIP_DF, var = "Metabolite")

# Filtering for VIP > 1
D52_CvsB_VIP_1 <- D52_CvsB_VIP_table %>% 
  filter(VIP >= 1)

D52_CvsP_VIP_1 <- D52_CvsP_VIP_table %>% 
  filter(VIP >= 1)

D52_BvsP_VIP_1 <- D52_BvsP_VIP_table %>% 
  filter(VIP >= 1)

D52_CvsB_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_CvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_BvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Bleached vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

### Compare for Venn Diagram (https://github.com/gaospecial/ggVennDiagram)

#Open this part in Repo

# nrow(D52_CvsB_VIP_1)
# nrow(D52_CvsP_VIP_1)
# 
# library("VennDiagram")
# 
# venn.x <- list(
#   D52_CvsB = sample(D52_CvsB_VIP_1$Metabolite), 
#   D52_CvsP = sample(D52_CvsP_VIP_1$Metabolite)
# )
# 
# myCol <- c("#8B0046", "#468B00")
# 
# venn.diagram(venn.x,
#              filename = 'output/Metabolomics/D52_BvsP_venn.png',
#              output = TRUE,
#              height = 480, 
#              width = 480,
#              resolution = 300,
#              category.names = c("Bleached", "Partial Mortality"),
#              lwd = 2,
#              col = c("#8B0046", "#468B00"),
#              fill = c(alpha("#8B0046",0.3), alpha("#468B00", 0.3)),
#              cex = 0.5,
#              fontfamily = "sans",
#              fontface = "bold",
#              cat.cex = 0.5,
#              cat.default.pos = "outer",
#              cat.pos = c(12, -12),
#              cat.dist = c(-0.45, -0.45),
#              cat.fontfamily = "sans",
#              cat.fontface = "bold"
#              )

T-test validation: Control vs Bleached

# Control vs Bleached

## Gather data frame and group by metabolite 
D52_CvB_gather <- D52_CvsB_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_CvB_gather$Treatment <- as.factor(D52_CvB_gather$Treatment)
D52_CvB_gather <- dplyr::group_by(D52_CvB_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvB_VIP_Select <- subset(D52_CvB_gather, D52_CvB_gather$Metabolite %in% D52_CvsB_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Control
D52_CvB_t.test <-do(D52_CvB_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_CvB_t.test$p.adj<-p.adjust(D52_CvB_t.test$p.value, method=c("fdr"), n=length(D52_CvsB_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_CvB_t.test.fdr <- D52_CvB_t.test %>% filter(p.adj <= 0.05)
length(D52_CvB_t.test.fdr$Metabolite) #10
## [1] 10
# # Filter for significantly different metabolites (p < 0.05)
# D52_CvB_t.test.sig <- D52_CvB_t.test %>% filter(p.value <= 0.05)
# length(D52_CvB_t.test.sig$Metabolite) #went from 32 to 10 with p value adjustment

# Filter for metabolites accumulated compared to control
D52_CvB_t.test.sig.up <- D52_CvB_t.test.fdr %>% filter(estimate1 > estimate2)

# Filter for metabolites depleted compared to control
D52_CvB_t.test.sig.down <- D52_CvB_t.test.fdr %>% filter(estimate1 < estimate2)

T-test validation: Control vs Partial Mortality

# Control vs Bleached

## Gather data frame and group by metabolite 
D52_CvsP_gather <- D52_CvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_CvsP_gather$Treatment <- as.factor(D52_CvsP_gather$Treatment)
D52_CvsP_gather <- dplyr::group_by(D52_CvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvsP_VIP_Select <- subset(D52_CvsP_gather, D52_CvsP_gather$Metabolite %in% D52_CvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Control_Ambient, estimate 2 = Bleached_Hot
D52_CvsP_t.test <-do(D52_CvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_CvsP_t.test$p.adj<-p.adjust(D52_CvsP_t.test$p.value, method=c("fdr"), n=length(D52_CvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_CvsP_t.test.fdr <- D52_CvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_CvsP_t.test.fdr$Metabolite) #34
## [1] 34
# Filter for significantly different metabolites (p < 0.05)
# D52_CvsP_t.test.sig <- D52_CvsP_t.test %>% filter(p.value <= 0.05)
# length(D52_CvsP_t.test.sig$Metabolite) #went from 38 to 34 with p value adjustment

# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

T-test validation: Bleached vs Partial Mortality

# Bleach vs Partial Mortality

## Gather data frame and group by metabolite 
D52_BvsP_gather <- D52_BvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_BvsP_gather$Treatment <- as.factor(D52_BvsP_gather$Treatment)
D52_BvsP_gather <- dplyr::group_by(D52_BvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_BvsP_VIP_Select <- subset(D52_BvsP_gather, D52_BvsP_gather$Metabolite %in% D52_BvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Mortality_Hot
D52_BvsP_t.test <-do(D52_BvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_BvsP_t.test$p.adj<-p.adjust(D52_BvsP_t.test$p.value, method=c("fdr"), n=length(D52_BvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_BvsP_t.test.fdr <- D52_BvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_BvsP_t.test.fdr$Metabolite) #0
## [1] 0
# Filter for significantly different metabolites (p < 0.05)
D52_CvsP_t.test.sig <- D52_BvsP_t.test %>% filter(p.value <= 0.05)
length(D52_CvsP_t.test.sig$Metabolite) #went from 10 to 0 with p value adjustment
## [1] 10
# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

VIP plotting after t-test validation

# Selecting metabolites that were validated by the t-test for each up and down accumulated
D52_CvsB_VIP_sig_up <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.up$Metabolite)
D52_CvsB_VIP_sig_down <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.down$Metabolite)

D52_CvsP_VIP_sig_up <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.up$Metabolite)
D52_CvsP_VIP_sig_down <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.down$Metabolite)

write.csv(D52_CvsB_VIP_sig_up, "../output/Metabolomics/D52_CvsB_VIP_sig_up.csv")
write.csv(D52_CvsB_VIP_sig_down, "../output/Metabolomics/D52_CvsB_VIP_sig_down.csv")
write.csv(D52_CvsP_VIP_sig_up, "../output/Metabolomics/D52_CvsP_VIP_sig_up.csv")
write.csv(D52_CvsP_VIP_sig_down, "../output/Metabolomics/D52_CvsP_VIP_sig_down.csv")

# Plotting

D52_CvsB_VIP_UP <- D52_CvsB_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("Accumulated") +
  xlab("") +
  ggtitle("Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D52_CvsB_VIP_DOWN <- D52_CvsB_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("Depleted") +
  xlab("VIP Score") +
#  ggtitle("Bleached: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D52_CvsP_VIP_UP <- D52_CvsP_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
  ggtitle("Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_CvsP_VIP_DOWN <- D52_CvsP_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_All_VIP_plot <- plot_grid(D52_CvsB_VIP_UP, D52_CvsP_VIP_UP, D52_CvsB_VIP_DOWN, D52_CvsP_VIP_DOWN, 
                              align = "v", 
                              ncol = 2, 
                              rel_heights = c(0.95, 0.35), 
                              labels = c("A", "B", "C", "D"))

D52_All_VIP_plot

#ggsave(filename="../output/Metabolomics/Day52_VIP.pdf", plot=D52_All_VIP_plot, dpi=300, width=7, height=9, units="in")

VIP plotting after t-test validation and comparing overlapping metabolites

# Up regulated metabolites that overlap between B and P

D52_overlap_BvsP_up_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_overlap_BvsP_up_1)[1] <- 'Metabolite'
D52_overlap_BvsP_up_2 <- merge(D52_overlap_BvsP_up_1, D52_CvsB_VIP_sig_up, by="Metabolite")
D52_overlap_BvsP_up_VIP <- merge(D52_overlap_BvsP_up_2, D52_CvsP_VIP_sig_up, by="Metabolite")
names(D52_overlap_BvsP_up_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_up_VIP)[3] <- 'Partial Mortality'

D52_overlap_BvsP_up_VIP_comb <- gather(D52_overlap_BvsP_up_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')

# Up regulated metabolites that are unique to B

D52_uniqueB_BvsP_up <- as.data.frame(setdiff(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_uniqueB_BvsP_up)[1] <- 'Metabolite'
D52_uniqueB_BvsP_up_VIP <- merge(D52_uniqueB_BvsP_up, D52_CvsB_VIP_sig_up, by="Metabolite")

# Up regulated metabolites that are unique to P

D52_uniqueP_BvsP_up <- as.data.frame(setdiff(D52_CvsP_VIP_sig_up$Metabolite, D52_CvsB_VIP_sig_up$Metabolite))
names(D52_uniqueP_BvsP_up)[1] <- 'Metabolite'
D52_uniqueP_BvsP_up_VIP <- merge(D52_uniqueP_BvsP_up, D52_CvsP_VIP_sig_up, by="Metabolite")

# Down regulated metabolites that overlap between B and P

D52_overlap_BvsP_down_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_down$Metabolite, D52_CvsP_VIP_sig_down$Metabolite))
names(D52_overlap_BvsP_down_1)[1] <- 'Metabolite'
D52_overlap_BvsP_down_2 <- merge(D52_overlap_BvsP_down_1, D52_CvsB_VIP_sig_down, by="Metabolite")
D52_overlap_BvsP_down_VIP <- merge(D52_overlap_BvsP_down_2, D52_CvsP_VIP_sig_down, by="Metabolite")
names(D52_overlap_BvsP_down_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_down_VIP)[3] <- 'Partial Mortality'

D52_overlap_BvsP_down_VIP_comb <- gather(D52_overlap_BvsP_down_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')


# Down regulated metabolites that are unique to B 
# There are none 

# Down regulated metabolites that are unique to P

D52_uniqueP_BvsP_down <- as.data.frame(setdiff(D52_CvsP_VIP_sig_down$Metabolite, D52_CvsB_VIP_sig_down$Metabolite))
names(D52_uniqueP_BvsP_down)[1] <- 'Metabolite'
D52_uniqueP_BvsP_down_VIP <- merge(D52_uniqueP_BvsP_down, D52_CvsP_VIP_sig_down, by="Metabolite")


# Plotting

D52_uniqueB_BvsP_up_plot <- D52_uniqueB_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#8B0046") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_uniqueP_BvsP_up_plot <- D52_uniqueP_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
#  ggtitle("Partial Mortality Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_uniqueP_BvsP_down_plot <- D52_uniqueP_BvsP_down_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_overlap_BvsP_up_plot <- D52_overlap_BvsP_up_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17),
                     legend.position = "none")

D52_overlap_BvsP_down_plot <- D52_overlap_BvsP_down_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.text = element_text(size = 20),
                     legend.title = element_text(size = 22)) 


D52_All_VIP_plot <- D52_uniqueB_BvsP_up_plot +
                     D52_overlap_BvsP_up_plot +
                     D52_uniqueP_BvsP_up_plot +
                     guide_area() +
                     D52_overlap_BvsP_down_plot +
                     D52_uniqueP_BvsP_down_plot + 
                     plot_layout(ncol=3, heights=c(0.4,0.1), guides = "collect") 
 
ggsave(filename="../output/Metabolomics/Day52_VIP.png", plot=D52_All_VIP_plot, dpi=300, width=20, height=9, units="in")
Day_52_VIP

Day_52_VIP

Pathway Analysis

Day 52 Control vs Bleached

#library(MetaboAnalystR)

# D52 Control vs Bleached UP Overrepresentation Analysis and  via Metaboanalyst 4.0

D52_CvsB_VIP_sig_up
D52_CvsB_VIP_sig_up$Metabolite[D52_CvsB_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"

mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsB_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine") #Replacing metabolite
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)

D52_CvsB_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsB_up_KEGG <- as.data.frame(D52_CvsB_up_dataset$map.table) #extracting KEGG terms

write.csv(D52_CvsB_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_KEGG.csv")

## PATHWAY ENRICHMENT

#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)

mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F) 
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters

D52_CvsB_up_analset_KEGG <- mSet_Path$api
D52_CvsB_up_PATHWAY <- as.data.frame(D52_CvsB_up_analset_KEGG$ora.results) #extracting KEGG terms

write.csv(D52_CvsB_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_PATHWAY.csv")

D52_CvsB_up_PATHWAY 

# PLOTTING

D52_CvsB_up_PATHWAY <- tibble::rownames_to_column(D52_CvsB_up_PATHWAY, "Pathway")
names(D52_CvsB_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsB_up_PATHWAY)[6] <- "neglogp"

logsig<- -log(0.05)

pointstolabel <- D52_CvsB_up_PATHWAY %>%
  filter(pvalue < 0.05)

D52_CvsB_up_PATHWAY_plot <- D52_CvsB_up_PATHWAY %>%
    mutate(color = ifelse(neglogp>logsig, "red", "grey")) %>%
    ggplot(aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color)) +
    geom_point(shape=21) +
    scale_color_identity() +
    scale_fill_identity() +
    geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
#    geom_text(data=subset(D52_CvsP_up_PATHWAY, neglogp>logsig), aes(Impact,neglogp,label=Pathway)) +
    ylab("-log(p)") +
    xlab("Pathway Impact") +
    theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.position="none") 

D52_CvsB_up_PATHWAY_plot

D52_CvsB_up_PATHWAY_plotting <- D52_CvsB_up_PATHWAY %>%
    mutate(color_plot = ifelse(neglogp>logsig, "red", "grey")) %>%
    mutate(plotname = ifelse(Pathway %in% pointstolabel$Pathway, Pathway, ""))

set.seed(42)
D52_CvsB_up_PATHWAY_plot <- ggplot(D52_CvsB_up_PATHWAY_plotting, 
                                   aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color_plot)) +
                            geom_point(shape=21) +
                            geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
                            geom_label_repel(aes(label = plotname), 
                                            min.segment.length = 0,
                                            arrow = arrow(length = unit(0.015, "npc")),
                                            size = 3.5, 
                                            fill = "white",
                                            box.padding = unit(2.5, "lines"), 
                                            point.padding = unit(0.8, "lines")) +
                            scale_color_identity() +
                            scale_fill_identity() +
                            ylab("-log(p)") +
                            xlab("Pathway Impact") +
                            theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                                             panel.grid.major = element_blank(),
                                             panel.grid.minor = element_blank(),
                                             axis.text = element_text(size=17, color="black"), 
                                             title = element_text(size=17, face="bold"), 
                                             axis.title = element_text(size=17), 
                                             legend.position="none")

D52_CvsB_up_PATHWAY_plot

-log(0.015242) #4.183701

Day 52 Control vs Partial Mortality

# D52 Control vs Partial Mortality UP Overrepresentation Analysis and  via Metaboanalyst 4.0

D52_CvsP_VIP_sig_up
D52_CvsP_VIP_sig_up$Metabolite[D52_CvsP_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"

mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsP_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
# mSet<-PerformDetailMatch(mSet, "Acetyl Proline")
# mSet<-GetCandidateList(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine")
mSet<-PerformDetailMatch(mSet, "Pyrroline-5-carboxylic acid")
mSet<-GetCandidateList(mSet)
#mSet<-SetCandidate(mSet, "Pyrroline-5-carboxylic acid", "1-Pyrroline-5-carboxylic acid") #this isnt working for some reson
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)

D52_CvsP_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsP_up_KEGG <- as.data.frame(D52_CvsP_up_dataset$map.table) #extracting KEGG terms

write.csv(D52_CvsP_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_KEGG.csv")

## PATHWAY ENRICHMENT

#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)

mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F) 
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters

D52_CvsP_up_analset_KEGG <- mSet_Path$api
D52_CvsP_up_PATHWAY <- as.data.frame(D52_CvsP_up_analset_KEGG$ora.results) #extracting KEGG terms

write.csv(D52_CvsP_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_PATHWAY.csv")

D52_CvsP_up_PATHWAY 

# Plotting

D52_CvsP_up_PATHWAY <- tibble::rownames_to_column(D52_CvsP_up_PATHWAY, "Pathway")
names(D52_CvsP_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsP_up_PATHWAY)[6] <- "neglogp"

logsig<- -log(0.05)

pointstolabel <- D52_CvsP_up_PATHWAY %>%
  filter(pvalue < 0.05)

D52_CvsP_up_PATHWAY_plotting <- D52_CvsP_up_PATHWAY %>%
    mutate(color_plot = ifelse(neglogp>logsig, "red", "grey")) %>%
    mutate(plotname = ifelse(Pathway %in% pointstolabel$Pathway, Pathway, ""))

set.seed(42)
D52_CvsP_up_PATHWAY_plot <- ggplot(D52_CvsP_up_PATHWAY_plotting, 
                                   aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color_plot)) +
                            geom_point(shape=21) +
                            geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
                            geom_label_repel(aes(label = plotname), 
                                            min.segment.length = 0,
                                            arrow = arrow(length = unit(0.015, "npc")),
                                            size = 3.5, 
                                            fill = "white",
                                            box.padding = unit(2.5, "lines"), 
                                            point.padding = unit(0.8, "lines")) +
                            scale_color_identity() +
                            scale_fill_identity() +
                            ylab("-log(p)") +
                            xlab("Pathway Impact") +
                            theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                                             panel.grid.major = element_blank(),
                                             panel.grid.minor = element_blank(),
                                             axis.text = element_text(size=17, color="black"), 
                                             title = element_text(size=17, face="bold"), 
                                             axis.title = element_text(size=17), 
                                             legend.position="none")

D52_CvsP_up_PATHWAY_plot

# https://ggrepel.slowkow.com/articles/examples.html

D37 ANALYSIS

PLS-DA on Timepoint 37 between each pair

Control vs Bleached

#Control vs Bleached

norm.data_2 <- column_to_rownames(norm.data, 'Sample.ID')

D37_CvsB <- norm.data_2 %>%
  filter(Time == "Day37") %>%
  filter(Treatment != "Mortality_Hot")

D37_CvsB_clean <- na.omit(D37_CvsB)

#assigning datasets 
X <- D37_CvsB[4:183]
Y <- as.factor(D37_CvsB$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 37 - Bleached vs Control")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp2
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp3
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp4
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp5
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp6
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
#VIP Extraction
D37_CvsB_VIP <- PLSDA.VIP(MyResult.plsda2)
D37_CvsB_VIP_DF <- as.data.frame(D52_CvsB_VIP[["tab"]])

Control vs Partial Mortality

D37_CvsP <- norm.data_2 %>%
  filter(Time == "Day37") %>%
  filter(Treatment != "Bleached_Hot")

D37_CvsP_clean <- na.omit(D37_CvsP)

#assigning datasets 
X <- D37_CvsP[4:183]
Y <- as.factor(D37_CvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples

#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 37 - Control vs Partial Mortality")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
#VIP Extraction
D37_CvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D37_CvsP_VIP_DF <- as.data.frame(D37_CvsP_VIP[["tab"]])

Bleached vs Partial Mortality

D37_BvsP <- norm.data_2 %>%
  filter(Time == "Day37") %>%
  filter(Treatment != "Control_Ambient")

D37_BvsP_clean <- na.omit(D37_BvsP)

#assigning datasets 
X <- D37_BvsP[4:183]
Y <- as.factor(D37_BvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 37 - Bleached vs Partial Mortality")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
#VIP Extraction
D37_BvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D37_BvsP_VIP_DF <- as.data.frame(D37_BvsP_VIP[["tab"]])

VIP Comparisons for D37 Timepoint

####### Overlaps for VIPs >1 #########

# Converting row names to column
D37_CvsB_VIP_table <- rownames_to_column(D37_CvsB_VIP_DF, var = "Metabolite")
D37_CvsP_VIP_table <- rownames_to_column(D37_CvsP_VIP_DF, var = "Metabolite")
D37_BvsP_VIP_table <- rownames_to_column(D37_BvsP_VIP_DF, var = "Metabolite")

# Filtering for VIP > 1
D37_CvsB_VIP_1 <- D37_CvsB_VIP_table %>% 
  filter(VIP >= 1)

D37_CvsP_VIP_1 <- D37_CvsP_VIP_table %>% 
  filter(VIP >= 1)

D37_BvsP_VIP_1 <- D37_BvsP_VIP_table %>% 
  filter(VIP >= 1)

D37_CvsB_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D37_CvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D37_BvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Bleached vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

### Compare for Venn Diagram (https://github.com/gaospecial/ggVennDiagram)

#Open this part in Repo

# nrow(D52_CvsB_VIP_1)
# nrow(D52_CvsP_VIP_1)
# 
# library("VennDiagram")
# 
# venn.x <- list(
#   D52_CvsB = sample(D52_CvsB_VIP_1$Metabolite), 
#   D52_CvsP = sample(D52_CvsP_VIP_1$Metabolite)
# )
# 
# myCol <- c("#8B0046", "#468B00")
# 
# venn.diagram(venn.x,
#              filename = 'output/Metabolomics/D52_BvsP_venn.png',
#              output = TRUE,
#              height = 480, 
#              width = 480,
#              resolution = 300,
#              category.names = c("Bleached", "Partial Mortality"),
#              lwd = 2,
#              col = c("#8B0046", "#468B00"),
#              fill = c(alpha("#8B0046",0.3), alpha("#468B00", 0.3)),
#              cex = 0.5,
#              fontfamily = "sans",
#              fontface = "bold",
#              cat.cex = 0.5,
#              cat.default.pos = "outer",
#              cat.pos = c(12, -12),
#              cat.dist = c(-0.45, -0.45),
#              cat.fontfamily = "sans",
#              cat.fontface = "bold"
#              )

T-test validation: Control vs Bleached

# Control vs Bleached

## Gather data frame and group by metabolite 
D37_CvB_gather <- D37_CvsB_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D37_CvB_gather$Treatment <- as.factor(D37_CvB_gather$Treatment)
D37_CvB_gather <- dplyr::group_by(D37_CvB_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D37_CvB_VIP_Select <- subset(D37_CvB_gather, D37_CvB_gather$Metabolite %in% D37_CvsB_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Control
D37_CvB_t.test <-do(D37_CvB_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D37_CvB_t.test$p.adj<-p.adjust(D37_CvB_t.test$p.value, method=c("fdr"), n=length(D37_CvsB_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D37_CvB_t.test.fdr <- D37_CvB_t.test %>% filter(p.adj <= 0.05)
length(D37_CvB_t.test.fdr$Metabolite) #3
## [1] 3
# # Filter for significantly different metabolites (p < 0.05)
D37_CvB_t.test.sig <- D37_CvB_t.test %>% filter(p.value <= 0.05)
length(D37_CvB_t.test.sig$Metabolite) #went from 18 to 3 with p value adjustment
## [1] 18
# Filter for metabolites accumulated compared to control
D37_CvB_t.test.sig.up <- D37_CvB_t.test.fdr %>% filter(estimate1 > estimate2)

# Filter for metabolites depleted compared to control
D37_CvB_t.test.sig.down <- D37_CvB_t.test.fdr %>% filter(estimate1 < estimate2)

T-test validation: Control vs Partial Mortality

# Control vs Bleached

## Gather data frame and group by metabolite 
D37_CvsP_gather <- D37_CvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D37_CvsP_gather$Treatment <- as.factor(D37_CvsP_gather$Treatment)
D37_CvsP_gather <- dplyr::group_by(D37_CvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D37_CvsP_VIP_Select <- subset(D37_CvsP_gather, D37_CvsP_gather$Metabolite %in% D37_CvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Control_Ambient, estimate 2 = Bleached_Hot
D37_CvsP_t.test <-do(D37_CvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D37_CvsP_t.test$p.adj<-p.adjust(D37_CvsP_t.test$p.value, method=c("fdr"), n=length(D37_CvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D37_CvsP_t.test.fdr <- D37_CvsP_t.test %>% filter(p.adj <= 0.05)
length(D37_CvsP_t.test.fdr$Metabolite) #12
## [1] 12
# Filter for significantly different metabolites (p < 0.05)
D37_CvsP_t.test.sig <- D37_CvsP_t.test %>% filter(p.value <= 0.05)
length(D37_CvsP_t.test.sig$Metabolite) #went from 26 to 12 with p value adjustment
## [1] 26
# Filter for metabolites accumulated compared to control
D37_CvsP_t.test.sig.up <- D37_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D37_CvsP_t.test.sig.down <- D37_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

T-test validation: Bleached vs Partial Mortality

# Bleach vs Partial Mortality

## Gather data frame and group by metabolite 
D37_BvsP_gather <- D37_BvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D37_BvsP_gather$Treatment <- as.factor(D37_BvsP_gather$Treatment)
D37_BvsP_gather <- dplyr::group_by(D37_BvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D37_BvsP_VIP_Select <- subset(D37_BvsP_gather, D37_BvsP_gather$Metabolite %in% D37_BvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Mortality_Hot
D37_BvsP_t.test <-do(D37_BvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D37_BvsP_t.test$p.adj<-p.adjust(D37_BvsP_t.test$p.value, method=c("fdr"), n=length(D37_BvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D37_BvsP_t.test.fdr <- D37_BvsP_t.test %>% filter(p.adj <= 0.05)
length(D37_BvsP_t.test.fdr$Metabolite) #0
## [1] 0
# Filter for significantly different metabolites (p < 0.05)
D37_CvsP_t.test.sig <- D37_BvsP_t.test %>% filter(p.value <= 0.05)
length(D37_CvsP_t.test.sig$Metabolite) #went from 10 to 0 with p value adjustment
## [1] 11
# Filter for metabolites accumulated compared to control
D37_CvsP_t.test.sig.up <- D37_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D37_CvsP_t.test.sig.down <- D37_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

VIP plotting after t-test validation

# Selecting metabolites that were validated by the t-test for each up and down accumulated
D37_CvsB_VIP_sig_up <- subset(D37_CvsB_VIP_1, D37_CvsB_VIP_1$Metabolite %in% D37_CvB_t.test.sig.up$Metabolite)
D37_CvsB_VIP_sig_down <- subset(D37_CvsB_VIP_1, D37_CvsB_VIP_1$Metabolite %in% D37_CvB_t.test.sig.down$Metabolite)

D37_CvsP_VIP_sig_up <- subset(D37_CvsP_VIP_1, D37_CvsP_VIP_1$Metabolite %in% D37_CvsP_t.test.sig.up$Metabolite)
D37_CvsP_VIP_sig_down <- subset(D37_CvsP_VIP_1, D37_CvsP_VIP_1$Metabolite %in% D37_CvsP_t.test.sig.down$Metabolite)

write.csv(D37_CvsB_VIP_sig_up, "../output/Metabolomics/D37_CvsB_VIP_sig_up.csv")
write.csv(D37_CvsB_VIP_sig_down, "../output/Metabolomics/D37_CvsB_VIP_sig_down.csv")
write.csv(D37_CvsP_VIP_sig_up, "../output/Metabolomics/D37_CvsP_VIP_sig_up.csv")
write.csv(D37_CvsP_VIP_sig_down, "../output/Metabolomics/D37_CvsP_VIP_sig_down.csv")

# Plotting

D37_CvsB_VIP_UP <- D37_CvsB_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1, 2.3) +
  ylab("Accumulated") +
  xlab("") +
  ggtitle("Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D37_CvsB_VIP_DOWN <- D37_CvsB_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1, 2.3) +
  ylab("Depleted") +
  xlab("VIP Score") +
#  ggtitle("Bleached: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D37_CvsP_VIP_UP <- D37_CvsP_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1, 2.3) +
  ylab("") +
  xlab("") +
  ggtitle("Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D37_CvsP_VIP_DOWN <- D37_CvsP_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1, 2.3) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D37_All_VIP_plot <- plot_grid(D37_CvsB_VIP_UP, D37_CvsP_VIP_UP, D37_CvsB_VIP_DOWN, D37_CvsP_VIP_DOWN, 
                              align = "v", 
                              ncol = 2, 
                              rel_heights = c(0.95, 0.35), 
                              labels = c("A", "B", "C", "D"))

D37_All_VIP_plot

#ggsave(filename="../output/Metabolomics/Day37_VIP.pdf", plot=D37_All_VIP_plot, dpi=300, width=7, height=9, units="in")

VIP plotting after t-test validation and comparing overlapping metabolites

# Up regulated metabolites that overlap between B and P

D37_overlap_BvsP_up_1 <- as.data.frame(intersect(D37_CvsB_VIP_sig_up$Metabolite, D37_CvsP_VIP_sig_up$Metabolite))
names(D37_overlap_BvsP_up_1)[1] <- 'Metabolite'
D37_overlap_BvsP_up_2 <- merge(D37_overlap_BvsP_up_1, D37_CvsB_VIP_sig_up, by="Metabolite")
D37_overlap_BvsP_up_VIP <- merge(D37_overlap_BvsP_up_2, D37_CvsP_VIP_sig_up, by="Metabolite")
names(D37_overlap_BvsP_up_VIP)[2] <- 'Bleached'
names(D37_overlap_BvsP_up_VIP)[3] <- 'Partial Mortality'

D37_overlap_BvsP_up_VIP_comb <- gather(D37_overlap_BvsP_up_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')

# Up regulated metabolites that are unique to B

D37_uniqueB_BvsP_up <- as.data.frame(setdiff(D37_CvsB_VIP_sig_up$Metabolite, D37_CvsP_VIP_sig_up$Metabolite))
names(D37_uniqueB_BvsP_up)[1] <- 'Metabolite'
D37_uniqueB_BvsP_up_VIP <- merge(D37_uniqueB_BvsP_up, D37_CvsB_VIP_sig_up, by="Metabolite")

# Up regulated metabolites that are unique to P

D37_uniqueP_BvsP_up <- as.data.frame(setdiff(D37_CvsP_VIP_sig_up$Metabolite, D37_CvsB_VIP_sig_up$Metabolite))
names(D37_uniqueP_BvsP_up)[1] <- 'Metabolite'
D37_uniqueP_BvsP_up_VIP <- merge(D37_uniqueP_BvsP_up, D37_CvsP_VIP_sig_up, by="Metabolite")

# Down regulated metabolites that overlap between B and P

D37_overlap_BvsP_down_1 <- as.data.frame(intersect(D37_CvsB_VIP_sig_down$Metabolite, D37_CvsP_VIP_sig_down$Metabolite))
names(D37_overlap_BvsP_down_1)[1] <- 'Metabolite'
D37_overlap_BvsP_down_2 <- merge(D37_overlap_BvsP_down_1, D37_CvsB_VIP_sig_down, by="Metabolite")
D37_overlap_BvsP_down_VIP <- merge(D37_overlap_BvsP_down_2, D37_CvsP_VIP_sig_down, by="Metabolite")
names(D37_overlap_BvsP_down_VIP)[2] <- 'Bleached'
names(D37_overlap_BvsP_down_VIP)[3] <- 'Partial Mortality'

D37_overlap_BvsP_down_VIP_comb <- gather(D37_overlap_BvsP_down_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')


# Down regulated metabolites that are unique to B 
# There are none 

# Down regulated metabolites that are unique to P

D37_uniqueP_BvsP_down <- as.data.frame(setdiff(D37_CvsP_VIP_sig_down$Metabolite, D37_CvsB_VIP_sig_down$Metabolite))
names(D37_uniqueP_BvsP_down)[1] <- 'Metabolite'
D37_uniqueP_BvsP_down_VIP <- merge(D37_uniqueP_BvsP_down, D37_CvsP_VIP_sig_down, by="Metabolite")


# Plotting

D37_uniqueB_BvsP_up_plot <- D37_uniqueB_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#8B0046") +
  xlim(1, 2.3) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D37_uniqueP_BvsP_up_plot <- D37_uniqueP_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1, 2.3) +
  ylab("") +
  xlab("") +
#  ggtitle("Partial Mortality Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D37_uniqueP_BvsP_down_plot <- D37_uniqueP_BvsP_down_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1, 2.3) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D37_overlap_BvsP_up_plot <- D37_overlap_BvsP_up_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1, 2.3) +
  ylab("") +
  xlab("") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17),
                     legend.position = "none")

D37_overlap_BvsP_down_plot <- D37_overlap_BvsP_down_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1, 2.3) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.text = element_text(size = 20),
                     legend.title = element_text(size = 22)) 


D37_All_VIP_plot <- D37_uniqueB_BvsP_up_plot +
                     D37_overlap_BvsP_up_plot +
#                     D37_uniqueP_BvsP_up_plot +
                     guide_area() +
                     D37_overlap_BvsP_down_plot +
#                     D37_uniqueP_BvsP_down_plot + 
                     plot_layout(ncol=2, heights=c(0.4,0.1), guides = "collect") 
 
ggsave(filename="../output/Metabolomics/Day37_VIP.png", plot=D37_All_VIP_plot, dpi=300, width=20, height=9, units="in")